## --------------------------------------- ##
## simulation setup: 
##  delta = n / p
##  rho = k / n
## --------------------------------------- ##

## --------------------------------------- ##
## cluster setup 
## --------------------------------------- ##
## i <- as.numeric(Sys.getenv("SLURM_ARRAY_TASK_ID")) # this should be 1--50
for(i in 1:length.out){

  cl <- makeCluster(detectCores()/2)
  registerDoParallel(cl)
  

## draw error term and n 
set.seed(3893)
n    <- floor(p * delta[i])
eta  <- rnorm(n)
etaz <- sigma * eta 
n_break <- floor(n / 2)

# doRNGseed(1234)
sim_out <- foreach(j = 1:length(rho), .combine = "rbind", .packages= c("BridgeChange", "glmnet", "MCMCpack", "mvnfast")) %dorng% ({
    
    # sim_out <- matrix(NA, nrow = length(rho), ncol = 3)
    # for (j in 1:length(rho)) {
        cat("Now at i = ", i, " j = ", j ,"\n")
        k <- ceiling(n * rho[j])

        l2_error <- save_lasso_ora <- save_bridge <- save_blasso <- save_ridge <- save_en  <- rep(NA, n_iter)
        for (iter in 1:n_iter) {
          rm(y); rm(X);
            ## gen data ***************************************************************
            X <<- as.matrix(faux::rnorm_multi(n = n,  mu = rep(0, p), r = 0.7))
           
            # regime specific coef
            beta1 <- sample(c(rnorm(k, 0, 3), rep(0, p - k)))
            beta2 <- sample(c(rnorm(k, 0, 3), rep(0, p - k)))
            beta  <- c(beta1, beta2)
           
            # draw data
            y <- rep(NA, n)
            y[1:n_break] <- X[1:n_break,] %*% beta1 + etaz[1:n_break]
            y[(n_break+1):n] <- X[(n_break+1):n, ] %*% beta2 + etaz[(n_break+1):n]
            y <<- y

            ## fit lasso ***************************************************************

            ## 1) residual break test and recover break point ************************
            # out         <- cv.glmnet(y = y, x= X, alpha = 1, nfolds = 3)
            # la_coef     <- coef(out, s = "lambda.min")
            # out         <- rlasso(y = y, x = X)
            # la_coef     <- coef(out)
            out         <- glmnet(y = y, x= X, alpha = 1)
            BIC         <- deviance(out) / var(y) + log(n) * out$df / n
            la_coef     <- coef(out)[,which.min(BIC)]

            resid       <- y - (X %*%  la_coef[-1] + la_coef[1])
            resid_break <- MCMCpack::MCMCresidualBreakAnalysis(resid)
            state_id    <- round(apply(attr(resid_break, "s.store"), 2, median))

            if (sum(table(state_id) < 2) >= 1) {
                state_id[2] <- 1; state_id[n-1] <- 2
            }

            # out_la1     <- cv.glmnet(y = y[state_id == 1], x = X[state_id == 1,], alpha = 1, nfolds = 3)
            # out_la2     <- cv.glmnet(y = y[state_id == 2], x = X[state_id == 2,], alpha = 1, nfolds = 3)
            # out_la1 <- regress(y =  y[state_id == 1], X = X[state_id == 1,], method = "lasso", validation = "LOO")
            # out_la2 <- regress(y =  y[state_id == 2], X = X[state_id == 2,], method = "lasso", validation = "LOO")
            # out_la1    <- rlasso(y = y[state_id == 1], x = X[state_id == 1,])
            # out_la2    <- rlasso(y = y[state_id == 2], x = X[state_id == 2,])

            out_la1     <- glmnet(y = y[state_id == 1], x = X[state_id == 1,])
            out_la2     <- glmnet(y = y[state_id == 2], x = X[state_id == 2,])
            BIC1 <- deviance(out_la1) / var(y[state_id==1]) + log(sum(state_id==1)) * out_la1$df / sum(state_id==1)
            BIC2 <- deviance(out_la2) / var(y[state_id==2]) + log(sum(state_id==2)) * out_la2$df / sum(state_id==2)

            # compute L2 error for prediction
            # la_coef_full   <- c(coef(out_la1, s = "lambda.min")[-1,], coef(out_la2, s = "lambda.min")[-1,])
            la_predict1 <- X[state_id == 1,] %*% coef(out_la1)[-1, which.min(BIC1)] + coef(out_la1)[1, which.min(BIC1)]
            la_predict2 <- X[state_id == 2,] %*% coef(out_la2)[-1, which.min(BIC2)] + coef(out_la2)[1, which.min(BIC2)]
            la_pred_full   <- c(la_predict1, la_predict2)
            l2_error[iter] <- sqrt(sum((la_pred_full - y)^2)) / n #/ sqrt(sum(beta^2))

            ## 2) oracle break point (fit two different lasso) *************************
            # out_la1_ora  <- cv.glmnet(y = y[1:n_break], x = X[1:n_break,], alpha = 1, nfolds = 3)
            # out_la2_ora  <- cv.glmnet(y = y[(n_break+1):n], x = X[(n_break+1):n,], alpha = 1, nfolds = 3)
            # out_la1_ora  <- rlasso(y = y[1:n_break], x = X[1:n_break,])
            # out_la2_ora  <- rlasso(y = y[(n_break+1):n], x = X[(n_break+1):n,])
            out_la1_ora  <- glmnet(y = y[1:n_break], x = X[1:n_break,], alpha = 1)
            out_la2_ora  <- glmnet(y = y[(n_break+1):n], x = X[(n_break+1):n,], alpha = 1)
            BIC1_ora     <- deviance(out_la1_ora) / var(y[1:n_break]) + log(n_break) * out_la1_ora$df / n_break
            BIC2_ora     <- deviance(out_la2_ora) / var(y[(n_break+1):n]) + log(n-n_break) * out_la2_ora$df / (n - n_break)

            # compute L2 error
            la_pred1_ora <- X[1:n_break,] %*% coef(out_la1_ora)[-1, which.min(BIC1_ora)] + coef(out_la1_ora)[1, which.min(BIC1_ora)]
            la_pred2_ora <- X[(n_break+1):n,] %*% coef(out_la2_ora)[-1, which.min(BIC2_ora)] + coef(out_la2_ora)[1, which.min(BIC2_ora)]
            la_pred_ora_full <- c(la_pred1_ora, la_pred2_ora)
            save_lasso_ora[iter] <- sqrt(sum((la_pred_ora_full - y)^2)) / n


            ## fit SparseChange ********************************************************
            out1 <- BridgeChangeReg(y~X, mcmc= 100, burn = 100, thin=1, verbose=0, n.break = 1)
            beta.bridge  <- coef_bridge(out1) #, unlist(summarize_coef(out1, X, y))
            beta0.bridge <- apply(attr(out1, 'intercept'), 2, mean)
            state_id     <- round(apply(attr(out1, "s.store"), 2, median))
            bridge_pred  <- c(
                X[state_id == 1, ] %*% beta.bridge[1:200] + beta0.bridge[1],
                X[state_id == 2, ] %*% beta.bridge[201:400] + beta0.bridge[2]
            )
            save_bridge[iter]  <- sqrt(sum((bridge_pred - y)^2)) / n

            # ## fit bayesian lasso *************************************************
            # out_blasso <- blasso(X = X, y = y, T = 200)
            # beta.blasso <- apply(out_blasso$beta[101:100, ], 2, mean)
            # save_blasso[iter]  <- sqrt(sum((beta - beta.blasso)^2)) / sqrt(sum(beta^2))

            # ## fit ridge ********************************************************
            # rg_out <- cv.glmnet(y = y, x= X, alpha = 0, nfolds = 3)
            # save_ridge[iter] <- sqrt(sum((coef(rg_out, s = "lambda.min")[-1,] - beta)^2)) / sqrt(sum(beta^2))
            #
            # ## fit elastic net **************************************************
            # en_out <- cv.glmnet(y = y, x= X, alpha = 0.5, nfolds = 3)
            # save_en[iter] <- sqrt(sum((coef(en_out, s = "lambda.min")[-1,] - beta)^2)) / sqrt(sum(beta^2))

        }

        mse.lasso     <- median(l2_error, na.rm = TRUE)
        mse.bridge    <- median(save_bridge, na.rm = TRUE)
        mse.lasso_ora <- median(save_lasso_ora, na.rm = TRUE)
        # mse.blasso    <- median(save_blasso, na.rm = TRUE)
        # mse.ridge  <- median(save_ridge, na.rm = TRUE)
        # mse.elastic <- median(save_en, na.rm = TRUE)


        out <- c(mse.lasso, mse.lasso_ora, mse.bridge) #, mse.ridge, mse.elastic)
        out


})
    cat("\nCurrent simulation is ", i, " iteration.\n")
    saveRDS(sim_out, file = paste("./change/corr07/pred/res/sim_pred_change_mse_corr07_", i, ".rds", sep = ''))
    
    
    stopCluster(cl)
    stopImplicitCluster()
}
